/*****************************************************************************************************************************************
******Code for Currie, Mueller-Smith, Rossin-Slater "Violence while in Utero: The Impacts of Assaults During Pregnancy on Birth Outcomes**
******July 2020***********************************************************************************************************************/

*****This code cleans the merged NYC crimes/births data and conducts analyses to produce the following output in the manuscript:

*** Table 3, row 3
*** Appendix Tables A9, A12

set more off
set matsize 10000

clear all

cap log close

***** Paths to data set locations, results graphs *****
global bulk 
global home 
global results 
global graphs

**** first, run clean_birthoutcomes.do file to clean the births data, then the mergebirths_tocrime.do file to merge births data to crime data

set scheme s1color

use ${bulk}births_crimes.dta, clear

keep if concep_year>=2004 & concep_year<=2012

***singletons only
keep if singleton==1

********************************************************
***** LINKING SIBLINGS ****
********************************************************

*Use mom (anonymized) SSN

gen mom_ssn_id = M_SSN_ID

bys mom_ssn_id: gen numsibs =_N

tab numsibs


*A few errors, recode as missing:
codebook mom_ssn_id if numsibs==205644

codebook mom_ssn_id if numsibs==430

replace mom_ssn_id = . if numsibs==205644 | numsibs==430

*for those who are missing SSN, use mom's full maiden name + DOB (anonymized)

egen mom_nameDOB_id = group(M_FNAME_ANON M_MNAME_ANON M_LNAME_ANON M_DOBM_ANON M_DOBDD_ANON M_DOBYY_ANON), missing

sum mom_nameDOB_id
*max is 1,683,249 -- so will add 2,000,000 to the SSN to make sure we have unique IDs

gen mom_id = .
gen str mom_id_source = ""
replace mom_id = mom_nameDOB_id if mom_ssn_id==.
replace mom_id_source = "name/DOB" if mom_ssn_id==.
replace mom_id = mom_ssn_id + 2000000 if mom_ssn_id!=.
replace mom_id_source = "SSN" if mom_ssn_id!=.


drop numsibs

bys mom_id: gen numsibs = _N
tab numsibs

tab mom_id_source if numsibs>=2

gen sibsample = numsibs>=2

label var sibsample "Siblings Sample"
label var numsibs "Number Siblings"


/********* KEEP ONLY SIBLINGS ***********/
keep if sibsample==1

*siblings with mom in single family home for all births 

bys mom_id: egen mean_house = mean(house)

keep if mean_house==1

count
codebook mom_id

*** child's year/month of birth

gen child_ym = ym(birth_year, birth_month)
format child_ym %tm


*** birth interval: number months from previous child

sort mom_id child_ym
by mom_id: gen prev_ym = child_ym[_n-1]
by mom_id: gen birth_int = 0 if _n==1
by mom_id: replace birth_int = child_ym-prev_ym if _n>1

tab birth_int

gen birth_int0 = (birth_int==0)
gen birth_int1 = (birth_int>0 & birth_int<=12)
gen birth_int2 = (birth_int>12 & birth_int<=24)
gen birth_int3 = (birth_int>24 & birth_int<=36)
gen birth_int4 = (birth_int>36 & birth_int<=48)
gen birth_int5 = (birth_int>48)


**** additional outcomes (lbw, preterm,  low apgar)

gen lbw = birthweight<2500 if birthweight<.
gen vlbw = birthweight<1500 if birthweight<.
gen hbw = birthweight>4000 if birthweight<.
gen pret = gest_weeks<37 if gest_weeks<.
gen vpret = gest_weeks<34 if gest_weeks<.

gen apgar1_low = apgar1<7 if apgar1<.
gen apgar5_low = apgar5<7 if apgar5<.

gen wgtgain_low = wgtgain<15 if wgtgain<.
gen wgtgain_high = wgtgain>40 if wgtgain<.

gen any_breastfeed = (excl_breastfeed==1|some_breastfeed==1) if excl_breastfeed<. & some_breastfeed<.


gen any_abncong = (any_abnormal==1|any_congen==1) if any_abnormal<. & any_congen<.


label var lbw "Low Birth Weight"
label var vlbw "Very Low Birth Weight"
label var hbw "High Birth Weight"
label var pret "Pre-Term Birth"
label var vpret "Very Pre-Term Birth"
label var apgar1_low "1-Min Apgar less than 7"
label var apgar5_low "5-Min Apgar less than 7"
label var wgtgain_low "Wgt Gain less than 15lbs"
label var wgtgain_high "Wgt Gain more than 40lbs"
label var any_breastfeed "Any Breastfeeding"
label var any_abncong "Any Abnormal or Congenital Conditions"


**** parity
gen parity1 = prevlivebirths==0
gen parity2 = prevlivebirths==1
gen parity3plus = prevlivebirths>=2 & prevlivebirths<.
gen paritymiss = prevlivebirths==.


***mom foreign: recode missing as 0
gen mom_foreignmiss = (mom_foreign==.)
replace mom_foreign = 0 if mom_foreign==.

***dad foreign: recode missing as 0
gen dad_foreignmiss = (dad_foreign==.)
replace dad_foreign = 0 if dad_foreign==.

**** father info missing
gen dadmiss = (dadagemiss==1 & dadeducmiss==1 & dadmissingrace==1 & dad_foreignmiss==1)
tab dadmiss

label var dadmiss "Father Info Missing"




/*************** CRIME VARIABLES *************/

global crimeclass felony misdemeanor violation

global crimetypes arson assault f_assault burglary drugs kidnap fraud larceny harassment homicide dui weapons other


**** NOTE: we calculate the total number of crimes during pregnancy in the mergebirths_tocrime.do file


*here, calculate the total number of crimes in 10 months after pregnancy: post10-post19

egen totcrimes_post = rowtotal(totcrimes_post10 totcrimes_post11 totcrimes_post12 totcrimes_post13 totcrimes_post14 totcrimes_post15 totcrimes_post16 totcrimes_post17 totcrimes_post18 totcrimes_post19)

foreach class in $crimeclass {
egen tot`class'_post = rowtotal(tot`class'_post10 tot`class'_post11 tot`class'_post12 tot`class'_post13 tot`class'_post14 tot`class'_post15 tot`class'_post16 tot`class'_post17 tot`class'_post18 tot`class'_post19)
}

foreach type in $crimetypes {
egen tot`type'_post = rowtotal(tot`type'_post10 tot`type'_post11 tot`type'_post12 tot`type'_post13 tot`type'_post14 tot`type'_post15 tot`type'_post16 tot`type'_post17 tot`type'_post18 tot`type'_post19)
}


gen anycrimes_preg = totcrimes_preg>0 if totcrimes_preg<.
gen anycrimes_post = totcrimes_post>0 if totcrimes_post<.

foreach class in $crimeclass {
gen any`class'_preg = tot`class'_preg>0 if tot`class'_preg<.
gen any`class'_post = tot`class'_post>0 if tot`class'_post<.
}

foreach type in $crimetypes {
gen any`type'_preg = tot`type'_preg>0 if tot`type'_preg<.
gen any`type'_post = tot`type'_post>0 if tot`type'_post<.
}

sum anyassault_preg
sum anyassault_post

**** crime/assault rates by months relative to conception

forval i=1/12 {
gen anycrimes_pre`i' = totcrimes_pre`i'>0 if totcrimes_pre`i'<.
foreach class in $crimeclass {
gen any`class'_pre`i' =  tot`class'_pre`i'>0 if tot`class'_pre`i'<.
}
foreach type in $crimetypes {
gen any`type'_pre`i' = tot`type'_pre`i'>0 if tot`type'_pre`i'<.
}
}

forval i=0/45 {
gen anycrimes_post`i' = totcrimes_post`i'>0 if totcrimes_post`i'<.
foreach class in $crimeclass {
gen any`class'_post`i' = tot`class'_post`i'>0 if tot`class'_post`i'<.
}
foreach type in $crimetypes {
gen any`type'_post`i' = tot`type'_post`i'>0 if tot`type'_post`i'<.
}
}


sum anyassault_pre9 anyassault_pre8 anyassault_pre7 anyassault_pre6 anyassault_pre5 anyassault_pre4 anyassault_pre3 anyassault_pre2 anyassault_pre1

sum anyassault_post0 anyassault_post1 anyassault_post2 anyassault_post3 anyassault_post4 anyassault_post5 anyassault_post6 anyassault_post7 anyassault_post8 anyassault_post9

sum anyassault_post10 anyassault_post11 anyassault_post12 anyassault_post13 anyassault_post14 anyassault_post15 anyassault_post16 anyassault_post17 anyassault_post18 anyassault_post19

/******* VARIATION WITHIN SIBLINGS ******/
sort mom_id child_ym

by mom_id: egen max_anyassault_preg = max(anyassault_preg)
by mom_id: egen min_anyassault_preg = min(anyassault_preg)

codebook mom_id if min_anyassault_preg==0 & max_anyassault_preg==1

label var anyassault_preg "Assault During Pregnancy"



/***** BBQ SAMPLE: Bronx, Brooklyn, and Queens only ****/
destring mom_borough, replace
sort mom_id child_ym
by mom_id: gen amom_borough = mom_borough if _n==1
bys mom_id: egen mom_borough1 = mean(amom_borough)
gen BBQ = (mom_borough1==2|mom_borough1==3|mom_borough1==4)

codebook mom_id if min_anyassault_preg==0 & max_anyassault_preg==1 & BBQ==1
codebook mom_id if min_anyassault_preg==0 & max_anyassault_preg==0 & BBQ==1
codebook mom_id if min_anyassault_preg==1 & max_anyassault_preg==1 & BBQ==1

/******************** DEFINING GLOBALS WITH OUTCOMES AND CALCULATING INDICES ***********************************/

/********************BBQ Sample Only **************/
keep if BBQ==1

**NOTE: outcomes are oriented such as a higher value means: worse infant health, more medical services, worse behaviors 

*need to reorient some outcomes : birth weight, gestation length, cmale

foreach var of varlist birthweight gest_weeks {
egen max_`var' = max(`var') if `var'!=.
gen inv_`var' = max_`var'-`var'
}

gen cfem = 1-cmale
gen nowic = 1-wic


**mean and SD of control group for each variable, then calculate z-score
foreach var of varlist inv_birthweight lbw vlbw hbw inv_gest_weeks pret vpret apgar1_low any_abncong nicu cfem child_death firsttri_pnc numvisits nowic momsmoke momdruguse mom_depressed wgtgain_low wgtgain_high csection induced anycompl {
egen m_`var' = mean(`var') if anyassault_preg==0
sum m_`var'
egen CM_`var' = mean(m_`var')
sum CM_`var'
egen sd_`var' = sd(`var') if anyassault_preg==0
sum sd_`var'
egen CSD_`var' = mean(sd_`var')
sum CSD_`var'

gen Z_`var' = (`var'-CM_`var')/CSD_`var'
}

sum Z_inv_birthweight
sum Z_vlbw
sum Z_nowic

**create indices
egen birthout_index = rowmean(Z_vlbw Z_vpret Z_apgar1_low Z_nicu Z_any_abncong Z_child_death)
egen medical_index = rowmean(Z_firsttri_pnc Z_numvisits Z_induced Z_csection Z_anycompl)
egen behavioral_index = rowmean(Z_momsmoke Z_momdruguse Z_mom_depressed Z_wgtgain_low Z_wgtgain_high Z_nowic)
egen appendix_index = rowmean(Z_vlbw Z_vpret Z_apgar1_low Z_nicu Z_any_abncong Z_child_death Z_inv_birthweight Z_lbw Z_inv_gest_weeks Z_pret)
*note: appendix_index includes also the components of birthout_index (so, it's including all possible birth outcomes)



global birthout vlbw vpret apgar1_low nicu any_abncong child_death birthout_index appendix_index
global medical firsttri_pnc numvisits induced csection anycompl medical_index
global behavioral momsmoke momdruguse mom_depressed wgtgain_low wgtgain_high nowic behavioral_index
global appendix birthweight lbw gest_weeks pret appendix_index


global indices birthout_index appendix_index medical_index behavioral_index 


**** mom and dad (time-varying) controls -- 
global controls momage1 momage2 momage3 momage4 momagemiss dadage1 dadage2 dadage3 dadage4 dadagemiss dadmiss mom_married momeduc1 momeduc2 momeduc3 momeduc4 momeducmiss dadeduc1 dadeduc2 dadeduc3 dadeduc4 dadeducmiss dadnonhispanicwhite dadhispanic dadnonhispanicblack dadotherrace dadmissingrace dad_foreign parity1 parity2 parity3plus paritymiss birth_int1 birth_int2 birth_int3 birth_int4 birth_int5


**********APPENDIX TABLE A9

eststo clear
foreach out in $birthout {
eststo: reg `out' anyassault_preg $controls i.concep_year i.concep_month, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}birthout_anyassault_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01)  stats(dv N,  label("Dept. var mean" "Indiv. obs.")) 
mtitles("VLBW" "V Pret" "Low 1m Apg" "NICU" "Abn/Con" "Death" "Index" "Br. Index") 
wrap label varwidth(20) brackets   ;
#delimit cr



eststo clear
foreach out in $medical {
eststo: reg `out' anyassault_preg $controls i.concep_year i.concep_month, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}medical_anyassault_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01)  stats(dv N,  label("Dept. var mean" "Indiv. obs.")) 
mtitles("PNC 1Tri" "NVis" "Induc" "Csec" "Compl" "Index") 
wrap label varwidth(20) brackets   ;
#delimit cr

eststo clear
foreach out in $behavioral {
eststo: reg `out' anyassault_preg $controls i.concep_year i.concep_month, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}behavioral_anyassault_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01)  stats(dv N,  label("Dept. var mean" "Indiv. obs.")) 
mtitles("Smoke" "Drugs" "Depr" "Low Wgt" "High Wgt" "No WIC" "Index") 
wrap label varwidth(20) brackets   ;
#delimit cr

eststo clear
foreach out in $appendix {
eststo: reg `out' anyassault_preg $controls i.concep_year i.concep_month, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}appendix_anyassault_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01)  stats(dv N,  label("Dept. var mean" "Indiv. obs.")) 
mtitles("Birwt" "LBW" "Gest" "Pret" "Index") 
wrap label varwidth(20) brackets   ;
#delimit cr

***** TABLE 3, Row 3

eststo clear
foreach out in $indices {
eststo: reg `out' anyassault_preg $controls i.concep_year i.concep_month, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}indices_anyassault_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_preg) 
se star(* .1 ** .05 *** .01)  stats(N,  label("Indiv. obs.")) 
mtitles("Birth Out" "Broad Birth Out" "Med Serv" "Behav/Well") 
wrap label varwidth(20) brackets   ;
#delimit cr
**************


***** APPENDIX TABLE A12

*Placebo: Use only children with assaults post-pregnancy and their siblings
*Note: drop moms that ever have assault during pregnancy (max_anyassault_preg==1)

label var anyassault_post "Assault Post Pregnancy"

eststo clear
foreach out in $indices {
eststo: reg `out' anyassault_post $controls i.concep_year i.concep_month if max_anyassault_preg==0, abs(mom_id) cl(mom_id)
qui sum `e(depvar)' if e(sample)==1
*estadd scalar dv=`r(mean)'
}

#delimit ;
esttab using "${results}indices_anyassaultPOST_MomFEBBQ.tex", replace fragment booktabs keep(anyassault_post) 
se star(* .1 ** .05 *** .01)  stats(N,  label("Indiv. obs.")) 
mtitles("Birth Out"  "Broad Birth Out" "Med Serv" "Behav/Well") 
wrap label varwidth(20) brackets   ;
#delimit cr



